vp_raw <- read.csv("mega-veridicality-v2.csv")
emotion_raw <- read.csv("BRM-emot-submit.csv")
emotion_select <- select(emotion_raw, "Word", "V.Mean.Sum", "V.SD.Sum", "A.Mean.Sum", "A.SD.Sum")
vp_data <- select(vp_raw, "participant", "verb", "frame", "voice", "polarity", "conditional", "sentence", "veridicality", "acceptability", "exclude")

Norming valence mean to get positive and negative valence

emotion <- emotion_select %>%
  mutate(valence_scaled = scale(V.Mean.Sum))
word_list <- unique(emotion$Word)
# Need to filter by acceptability (see 2019 paper)
# Need to normalize by participants (2016 paper used "ordinal model-based normalization procedure")
vp_data <- vp_data %>%
  mutate(veridicality_num = ifelse(veridicality == "yes", 1, ifelse(veridicality == "no", -1, 0))) %>%
  filter(exclude == "False") %>%
  mutate(Word = verb) %>%
  filter(Word %in% word_list)

create relevant subset for veridicality ratings

example of items
Frame Voice Sentence
that_S active Someone cared that a particular thing happened. Did that thing happen?
that_S passive Someone was revolted that a particular thing happened. Did that thing happen?
to_VPeventive active A particular person loved to do a particular thing. Did that person do that thing?
to_VPeventive passive A particular person was seen to do a particular thing. Did that person do that thing?
to_VPstative active A particular person opted to have a particular thing. Did that person have that thing?
to_VPstative passive A particular person was inspired to have a particular thing. Did that person have that thing?
for_NP_to_VP active Someone pressed for a particular thing to happen. Did that thing happen?
for_NP_to_VP passive N/A
NP_to_VPeventive active Someone contracted a particular person to do a particular thing. Did that person have that thing?
NP_to_VPeventive passive N/A
NP_to_VPstative active Someone badgered a particular person to have a particular thing. Did that person have that thing?
NP_to_VPstative passive N/A
# the White paper (2016) uses only observations with case "that_S" to calculate veridicality, not sure if this is important
veridicality_filter <- vp_data %>%
  filter(polarity == "positive" & conditional == "False") 
  
# veridicality_ratings <- veridicality_filter %>%
#   multi_boot_standard(col = "veridicality_num") %>%
#   mutate(veridicality_mean = mean, YMin = mean - ci_lower, YMax = mean + ci_upper) %>%
#   ungroup(Word, voice) %>%
#   mutate(Word = fct_reorder(as.factor(Word), mean))

veridicality_ratings <- veridicality_filter %>%
  group_by(Word, voice) %>%
  # multi_boot_standard(col = "veridicality_num") %>%
  summarise(veridicality_mean = mean(veridicality_num)) %>%
  ungroup() %>%
  mutate(Word = fct_reorder(Word, veridicality_mean))

veridicality <- merge(veridicality_ratings, emotion, by = "Word", all.x = TRUE) 
veridicality <- veridicality %>%
  rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
  mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))

ggplot(data = veridicality, aes(x = valence_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) +
  geom_label() + 
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = veridicality, aes(x = arousal_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(~valence_group) +
  geom_label() + 
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = veridicality, aes(x = arousal_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(voice~valence_group) +
  geom_label() + 
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

{r} # veridicality$relativeValence = abs(veridicality$valence_mean) # m = lm(mean ~ relativeValence + arousal_mean, data = veridicality) # summary(m) #

create relevant subset for projectivity ratings

examples of items:
Frame Voice Valence Sentence Note
that_S active positive If John didn’t find that a particular thing happened, did that thing happen? arousal low, projectivity low
that_S passive negative If John wasn’t scared that a particular thing happened, did that thing happen? arousal high, projectivity high
that_S active positive If John didn’t discover that a particular thing happened, did that thing happen? outlier: arousal high, projectivity low
that_S passive positive If John wasn’t jarred that a particular thing happened, did that thing happen? outlier: arousal low, projectivity high
that_S active negative If John didn’t resent that a particular thing happened, did that thing happen? outlier: arousal low, projectivity high
that_S passive negative If John wasn’t tricked that a particular thing happened, did that thing happen? outlier: arousal high, projectivity low
projectivity_filter <- vp_data %>%
  filter(polarity == "negative" & conditional == "True") 

# projectivity_ratings <- projectivity_filter %>%
#   multi_boot_standard(col = "veridicality_num") %>%
#   mutate(projectivity_mean = mean, YMin = mean - ci_lower, YMax = mean + ci_upper) %>%
#   ungroup(Word, voice) %>%
#   mutate(Word = fct_reorder(as.factor(Word), mean))

projectivity_ratings <- projectivity_filter %>%
  group_by(Word, voice) %>%
  # multi_boot_standard(col = "veridicality_num") %>%
  summarise(projectivity_mean = mean(veridicality_num)) %>%
  ungroup() %>%
  mutate(Word = fct_reorder(Word, projectivity_mean))

projectivity <- merge(projectivity_ratings, emotion, by = "Word", all.x = TRUE) 

projectivity <- projectivity %>%
  rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
  mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))

ggplot(data = projectivity, aes(x = valence_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) +
  geom_label() +
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = projectivity, aes(x = arousal_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(~valence_group) +
  geom_label() +
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

# taking into account voice
ggplot(data = projectivity, aes(x = arousal_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(voice~valence_group) +
  geom_label() +
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

projectivity against arousal_mean, by valence_bins

projectivity_neg <- projectivity %>%
  filter(valence_group == "negative")
projectivity_neg <-projectivity_neg[order(projectivity_neg$valence_mean),]
projectivity_neg[c("valence_bin")] <- 0
sep_neg = nrow(projectivity_neg) / 3 # 63.66667
for (i in 1:nrow(projectivity_neg)) {
  if (i <= sep_neg) {
    projectivity_neg[i, "valence_bin"] <- "very negative" #contains 63 items
  } else if (i <= 2 * sep_neg) {
    projectivity_neg[i, "valence_bin"] <- "moderately negative" # contains 64 items
  } else {
    projectivity_neg[i, "valence_bin"] <- "slightly negative" # contains 64 items
  }
}

projectivity_pos <- projectivity %>%
  filter(valence_group == "positive")
projectivity_pos <-projectivity_pos[order(projectivity_pos$valence_mean),]
projectivity_pos[c("valence_bin")] <- 0
sep_pos = nrow(projectivity_pos) / 3 # 77.3333
for (i in 1:nrow(projectivity_pos)) {
  if (i <= sep_pos) {
    projectivity_pos[i, "valence_bin"] <- "slightly positive" # contains 77 items 
  } else if (i <= 2 * sep_pos) {
    projectivity_pos[i, "valence_bin"] <- "moderately positive" # contains 77 items
  } else {
    projectivity_pos[i, "valence_bin"] <- "very positive" # contains 78 items
  }
}

projectivity_bin <- rbind(projectivity_neg, projectivity_pos)
projectivity_bin$valence_bin_f = factor(projectivity_bin$valence_bin, levels=c("very negative", "moderately negative", "slightly negative", "slightly positive", "moderately positive", "very positive"))

ggplot(data = projectivity_bin, aes(x = arousal_mean, y = projectivity_mean, colour = valence_bin_f, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(~valence_bin_f) +
  geom_label() +
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

# taking into account voice
ggplot(data = projectivity_bin, aes(x = arousal_mean, y = projectivity_mean, colour = valence_bin_f, label = Word)) +
  #geom_point(width = .3,height = .025) + 
  facet_grid(voice~valence_bin_f) +
  geom_label() +
  geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

{r} # projectivity$relativeValence = abs(projectivity$valence_mean) # m = lm(mean ~ relativeValence + arousal_mean, data = projectivity) # summary(m) #

projectivity_ratings_participant <- projectivity_filter

projectivity_participant <- merge(projectivity_ratings_participant, emotion, by = "Word", all.x = TRUE) 
projectivity_participant <- projectivity_participant %>%
  rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
  mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))

# the model check of the model as previously formulated (ie, with uncentered predictors) suggested huge collinearity, almost all values in covariance matrix > .4). the current model check still suggests there's sth wonky going on with the homoscedasticity tests (because the outcome variable is a three-way categorical variable instead of continuous, which we can't do anything about within the model, but seee logistic model below that replicates the result). 
projectivity_participant$relativeValence = abs(projectivity_participant$valence_mean)

# visualize correlations between variables
pairscor.fnc(projectivity_participant[,c("veridicality_num","arousal_mean","relativeValence","valence_mean")])

# center predictors to reduce collinearity
projectivity_participant = projectivity_participant %>%
  mutate(crelativeValence = relativeValence-mean(relativeValence),carousal_mean=arousal_mean-mean(arousal_mean))

m = lmer(veridicality_num ~ carousal_mean * crelativeValence + (1 | participant) + (1 | Word), data = projectivity_participant)
summary(m) # main effects of arousal and relative valence!
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## veridicality_num ~ carousal_mean * crelativeValence + (1 | participant) +  
##     (1 | Word)
##    Data: projectivity_participant
## 
## REML criterion at convergence: 6062.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7903 -0.5219 -0.0759  0.5784  3.6057 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Word        (Intercept) 0.05401  0.2324  
##  participant (Intercept) 0.04782  0.2187  
##  Residual                0.18567  0.4309  
## Number of obs: 4450, groups:  Word, 423; participant, 160
## 
## Fixed effects:
##                                Estimate Std. Error t value
## (Intercept)                    0.211080   0.022423   9.413
## carousal_mean                  0.054950   0.016052   3.423
## crelativeValence               0.182165   0.023798   7.655
## carousal_mean:crelativeValence 0.008049   0.022795   0.353
## 
## Correlation of Fixed Effects:
##             (Intr) crsl_m crltvV
## carousal_mn  0.060              
## crelatvVlnc  0.046 -0.361       
## crsl_mn:crV -0.245 -0.214 -0.202
pdf(file="modelcheck_linear.pdf",height=8,width=9)
check_model(m)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4450 rows containing missing values (geom_text_repel).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
# create a categorical projectivity variable that treats only "yes" responses as projective, all others as not projective
projectivity_participant$categorical_projectivity = as.factor(ifelse(projectivity_participant$veridicality_num == 1, "projective","non-projective"))

# almost three times as many non-projective compared to projective responses
table(projectivity_participant$categorical_projectivity)
## 
## non-projective     projective 
##           3176           1274
prop.table(table(projectivity_participant$categorical_projectivity))
## 
## non-projective     projective 
##      0.7137079      0.2862921
m = glmer(categorical_projectivity ~ carousal_mean * crelativeValence + (1 | participant) + (1 | Word), data = projectivity_participant, family="binomial")
summary(m) # main effects of arousal and relative valence!
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: categorical_projectivity ~ carousal_mean * crelativeValence +  
##     (1 | participant) + (1 | Word)
##    Data: projectivity_participant
## 
##      AIC      BIC   logLik deviance df.resid 
##   4045.9   4084.3  -2016.9   4033.9     4444 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1072 -0.4152 -0.2109  0.3425  8.4193 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Word        (Intercept) 2.719    1.649   
##  participant (Intercept) 1.707    1.307   
## Number of obs: 4450, groups:  Word, 423; participant, 160
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -1.68140    0.15224 -11.045  < 2e-16 ***
## carousal_mean                   0.49379    0.11782   4.191 2.78e-05 ***
## crelativeValence                1.26659    0.17407   7.276 3.43e-13 ***
## carousal_mean:crelativeValence -0.08131    0.16765  -0.485    0.628    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) crsl_m crltvV
## carousal_mn  0.026              
## crelatvVlnc -0.028 -0.305       
## crsl_mn:crV -0.238 -0.269 -0.216